apiVersion: apps/v1
kind: Deployment
metadata:
  name: query-router
  labels:
    app.kubernetes.io/name: query-router
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: query-router
  template:
    metadata:
      labels:
        app.kubernetes.io/name: query-router
    spec:

      imagePullSecrets:
        - name:   nvcrio
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
      containers:
      - name:  query-router
        imagePullPolicy: IfNotPresent
        image:  {{ .Values.query.image }}
        command:
        - uvicorn
        - RetrievalAugmentedGeneration.common.server:app
        - --port
        - "8081"
        - --host
        - 0.0.0.0
        env: 
        - name: APP_MILVUS_URL
          value: http://milvus:19530
        - name: APP_LLM_SERVERURL
          value: llm:8001
        - name: APP_LLM_MODELNAME
          value: ensemble
        - name: APP_LLM_MODELENGINE
          value: triton-trt-llm
#        - name: APP_CONFIG_FILE # THIS SHOULD BE A CONFIGMAP
#         value: ""
        ports:  
        - containerPort: 8081
        volumeMounts:
          - mountPath: /dev/shm
            name: dshm
        resources:
          limits:
            {{ .Values.query.gpu.type }}: {{ .Values.query.gpu.count }} 
---
apiVersion: v1
kind: Service
metadata:
  name:  query
spec:
  selector:
    app.kubernetes.io/name: query-router
  ports:
    - protocol: TCP
      port: 8081
      targetPort: 8081